library(plyr)

#Functions to perform likelihood ratio tests for nested models with multiple imputatation. 
#The functions were built based on the pool.compare function from the mice R package (version <3).
#They implement the method developed by
#Meng, X. L., & Rubin, D. B. (1992). Performing likelihood ratio tests with multiply-imputed data sets. Biometrika, 79(1), 103–111. https://doi.org/10.1093/biomet/79.1.103

LLCox <- function(formul, data, coefs, survObject){
  #Calculates the partila likelihood based on the Efron Method, function is a modifie version of pool.compare in mice
  model.matrix.out_t <- model.matrix(formul, data)[, !grepl('(Intercept)', colnames(model.matrix(formul, data)))]
  model.matrix.out <- model.matrix.out_t[, !grepl('strata\\(', colnames(model.matrix.out_t))]
  colnames(model.matrix.out)
  names(coefs)
  Xb <- model.matrix.out %*% coefs
  # print ('hi')
  wrkdf <- data.frame (
    Xb = Xb,
    haz.rates = exp(Xb),
    T = survObject[, 1],
    delta = survObject[, 2])
  
  if (ncol(survObject) == 3){
    wrkdf$T1 <- survObject[, 2]
    wrkdf$delta <- survObject[, 3]
  }
  Stratif <- FALSE
  if (any(grepl('strata', colnames(model.matrix(formul, data))))){
    Stratif <- TRUE
    strat_t <- colnames(model.matrix(formul, data))[grepl('strata', colnames(model.matrix(formul, data)))][1]
    strat_t <- gsub('strata\\(', '', strat_t)
    strat <- gsub('\\).+', '', strat_t)
    wrkdf$strata <- data[, strat]
    wrkdf$T.strat <- paste('Strat', wrkdf$strata, '.T', wrkdf$T, sep = '')
  }
  
  #D(t(i)) is the number of individuals that fail at time T
  if (Stratif == FALSE){
    fail.times <- plyr::ddply(wrkdf[wrkdf$delta==1,], .(T), plyr::summarize, 
                        product = prod(haz.rates), 
                        sum = sum(haz.rates), 
                        Di = length(T),
                        denom = NA)
  }else{
    fail.times <- plyr::ddply(wrkdf[wrkdf$delta==1, ], .(T.strat), plyr::summarize, 
                        product = prod(haz.rates), 
                        sum = sum(haz.rates), 
                        Di = length(T),
                        denom = NA,
                        strata = unique(strata),
                        T = unique(T))
  }
  for (i in 1:nrow(fail.times)){
    Di <- fail.times[i, 'Di']
    T <- fail.times[i, 'T']
    if (Stratif == TRUE){
      stratum <- fail.times[i, 'strata']
      T.stratum <- fail.times[i, 'T.strat']
    }
    if (ncol(survObject) == 2){
      if (Stratif == FALSE){
        denom.fixed <- sum(wrkdf[wrkdf$T >= T, 'haz.rates'])
      }else{
        denom.fixed <- sum(wrkdf[wrkdf$T >= T & wrkdf$strata == stratum, 'haz.rates'])
      }
    }
    if (ncol(survObject) == 3){
      if (Stratif == FALSE){
        denom.fixed <- sum(wrkdf[wrkdf$T == T, 'haz.rates'])
      }else{
        denom.fixed <- sum(wrkdf[wrkdf$T.strat == T.stratum, 'haz.rates'])
      }
    }
    denom <- NULL
    for (k in (1:Di)){
      denom <- c(denom, denom.fixed - (k-1)/Di*fail.times[i, 'sum'] )
    }
    fail.times$denom[i] <- prod(denom)
  }
  part.log.lik <- sum(log(fail.times$product/fail.times$denom))
  part.log.lik
  dfault.ll <- summary(coxph(formul, data), tie = 'efron')$loglik[2]
  dfault.ll    
  # print (paste('Custom LL:', part.log.lik, ' vs default LL:', dfault.ll))
  return(2 * part.log.lik)# return twice the likelihood
}


coxLRtest <- function (fit1, fit0) {
  call <- match.call()
  # do.call(survObject)
  # if (!is.mira(fit1) | !is.mira(fit0)) 
  #     stop("fit1 and fit0 should both have class 'mira'.\n")
  m1 <- nrow(fit1$qhat)
  m0 <- nrow(fit0$qhat)
  if (m1 != m0) 
    stop("Number of imputations differs between fit1 and fit0.\n")
  if (m1 < 2) 
    stop("At least two imputations are needed for pooling.\n")
  m <- m1
  est1.qbar <- fit1$beta
  est0.qbar <- fit0$beta
  est1.qhat <- fit1$qhat
  est0.qhat <- fit0$qhat
  
  dimQ1 <- length(est1.qbar)
  dimQ2 <- dimQ1 - length(est0.qbar)
  formula1 <- fit1$formul
  formula0 <- fit0$formul
  vars1 = names(est1.qbar)
  vars0 = names(est0.qbar)
  if (is.null(vars1) | is.null(vars0)) 
    stop("coefficients do not have names")
  if (dimQ2 < 1) 
    stop("The larger model should be specified first and must be strictly larger than the smaller model.\n")
  if (!setequal(vars0, intersect(vars0, vars1))) 
    stop("The smaller model should be fully contained in the larger model. \n")
  #if (is.null(data)) 
  #    stop("For method=likelihood the imputed data set (a mids object) should be included.\n")
  devM <- devL <- 0
  for (i in (1:m)) {
    dat1 <- fit1$imputed.data[[i]]
    dat0 <- fit0$imputed.data[[i]]
    devL <- devL + LLCox(formul = formula1, data = dat1, coefs = est1.qbar, survObject = fit1$survObject[[i]]) - LLCox(formula0, dat0, est0.qbar, fit0$survObject[[i]])
    devM <- devM + LLCox(formula1, dat1, est1.qhat[i, ], fit1$survObject[[i]]) - LLCox(formula0, dat0, est0.qhat[i, ], fit0$survObject[[i]])
  }
  devL <- devL/m
  devM <- devM/m
  rm <- ((m + 1)/(dimQ2 * (m - 1))) * (devM - devL)
  Dm <- devL/(dimQ2 * (1 + rm))
  
  v <- dimQ2 * (m - 1)
  if (v > 4) {
    w <- 4 + (v - 4) * ((1 + (1 - 2/v) * (1/rm))^2)
  } else {w <- v * (1 + 1/dimQ2) * ((1 + 1/rm)^2)/2}
  statistic <- c(
    pvalue = 1 - pf(Dm, dimQ2, w))
  return(statistic)
}
